\chapter{Proofs of Theoretical Results}
\label{app:proofs}

\section[Fixed-Point Existence]{Fixed-Point Existence (\cref{thm:fixed_point})}

\begin{proof}
Construct the Banach space $\mathcal{X} = \prod_{i \in V} \StateSpace_i$ with metric $d(\mathbf{x}, \mathbf{y}) = \max_i \|x_i - y_i\|$. Under Assumption~\ref{assump:regularity}, each micro-update map $F_i(\cdot, \Phi)$ is a contraction with modulus $\kappa_i < 1$. Define macro-operator $\mathcal{G}(\Phi) = (G_1(\Phi), \ldots, G_L(\Phi))$, which is Lipschitz with constant $L_G < 1$ by assumption. Combine into the operator
\begin{equation}
\mathcal{T}(\mathbf{x}, \Phi) = (F(\mathbf{x}, \Phi), \mathcal{G}(\Phi)).
\end{equation}
The product metric $D((\mathbf{x}, \Phi), (\mathbf{y}, \Psi)) = \max\{d(\mathbf{x}, \mathbf{y}), \|\Phi - \Psi\|\}$ yields contraction modulus $\kappa = \max\{\max_i \kappa_i, L_G\} < 1$. Banach's fixed-point theorem ensures a unique fixed point $(\mathbf{x}^*, \Phi^*)$.
\end{proof}

\section[Option Policy Improvement]{Option Policy Improvement (\cref{thm:option_improvement})}

\begin{proof}
Let $\mathcal{B}$ be the hierarchical Bellman operator acting on option-value functions $Q_o(b)$. Under bounded rewards and discount factor $\gamma < 1$, $\mathcal{B}$ is a contraction in the supremum norm. Policy improvement updates options via $\pi'(o|b) = \mathbb{I}[o = \arg\max_{o'} Q_{o'}(b)]$. Standard policy iteration guarantees $V^{\pi'} \geq V^{\pi}$. The presence of delayed feedback introduces at most a constant bias accounted for in Chapter~\ref{chap:multiagent}'s regret analysis, leaving monotonicity intact.
\end{proof}

\section[Bounded Utility]{Bounded Utility (\cref{prop:bounded_utility})}

\begin{proof}
Objective $U$ is continuous over the compact feasible set defined by resource constraints. The extreme value theorem implies existence of a maximizer. To show finiteness, note that weights $w_Q, w_L, w_C, w_R$ are finite and reward/penalty functions are bounded. Thus $U$ cannot diverge. Mixed strategies over options correspond to convex combinations of deterministic policies; convexity ensures the optimum lies in the feasible region.
\end{proof}

\section[Hierarchical Critical Temperature]{Hierarchical Critical Temperature (\cref{thm:hierarchical_critical_temp})}

\begin{proof}
Linearize the self-consistency equations: $m_\ell = \sum_{\ell'} \beta J^{(\ell,\ell')}_{\text{eff}} m_{\ell'}$. Write in matrix form $\mathbf{m} = \mathbf{M} \mathbf{m}$. Non-trivial solutions exist when $\det(\mathbf{I} - \mathbf{M}) = 0$, i.e., $1$ is an eigenvalue of $\mathbf{M}$. The largest eigenvalue dictates the smallest $\beta_c$ satisfying the condition. Translating to critical temperature $T_c = 1/(k_B \beta_c)$ yields the claim.
\end{proof}

\section[Emergence Bounds]{Emergence Bounds (\cref{thm:emergence_bounds})}

\begin{proof}
Effective information is bounded above by the maximum entropy of the target level: $EI_{\ell \rightarrow \ell+1} \leq H_{\max}(S_{\ell+1})$. Similarly $EI_{\ell \rightarrow \ell} \geq H_{\min}(S_\ell) > 0$ under the assumption of non-degenerate interventions. Substitute into \cref{def:emergence_strength} to obtain
\begin{equation}
\mathcal{E}_{\ell \rightarrow \ell+1} \leq \frac{H_{\max}(S_{\ell+1}) - H_{\min}(S_\ell)}{H_{\min}(S_\ell)}.
\end{equation}
Lower bound zero follows because numerator is non-negative.
\end{proof}

\section[Information Bottleneck Phase Transitions]{Information Bottleneck Phase Transitions (\cref{thm:bottleneck_phases})}

\begin{proof}
The information bottleneck Lagrangian $\mathcal{L} = I(X; T) - \beta I(T; Y) - \sum_{x} \lambda(x) (\sum_t P(t|x) - 1)$ yields Euler--Lagrange equations. Linearize the fixed-point equations around solutions at $\beta = \beta_c$. The Jacobian's eigenvalues determine stability. When the minimum eigenvalue crosses zero, new solution branches emerge, indicating a phase transition.
\end{proof}

\section[Queue Stability]{Queue Stability (\cref{lem:queue_stability})}

\begin{proof}
For the birth-death chain with birth rate $\lambda$ and death rate $\mu$, the drift of $V(n) = n^2$ is
\begin{equation}
\Delta V = \lambda [(n+1)^2 - n^2] + \mu [(n-1)^2 - n^2] = (2\lambda - 2\mu) n + (\lambda + \mu).
\end{equation}
When $\lambda < \mu$, the coefficient of $n$ is negative, ensuring negative drift beyond a finite set. Foster--Lyapunov criteria guarantee positive recurrence.
\end{proof}

\section[Useful Noise Interval]{Useful Noise Interval (\cref{prop:useful_noise})}

\begin{proof}
Given concavity $J''(\sigma) < 0$ on $(0, \sigma^*)$, the derivative decreases. Setting $J'(\sigma^*) = 0$ yields unique maximizer in the interval. Monetary evaluation of hierarchical performance thus supports selecting non-zero noise intensities.
\end{proof}

\section[Bandit Regret]{Bandit Regret (\cref{thm:bandit_regret})}

\begin{proof}
Let $A_t$ be design matrices accumulating context vectors. LinUCB regret satisfies
\begin{equation}
R_T \leq 2 \sqrt{T d \log\left(1 + \frac{T L^2}{\lambda}\right)} + d_\ell,
\end{equation}
where $L$ bounds context norms. The additive $d_\ell$ term accounts for delayed feedback; refer to delayed bandit analysis \cite{joulani2013} for the additional term. Combining yields the stated bound.
\end{proof}
